home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk main program
- */
- #define MAIN 1
-
- #include <stdio.h>
-
- #include "bawk.h"
-
- /*
- * Main program
- */
- main( argc, argv )
- int argc;
- char **argv;
- {
- void newfile();
-
- char gotrules, didfile, getstdin;
-
- getstdin =
- didfile =
- gotrules = 0;
-
- /*
- * Initialize global variables:
- */
- Beginact = (char *) NULL;
- Endact = (char *) NULL;
- Rules = (RULE *) NULL;
- Rulep = (RULE *) NULL;
- #ifdef DEBUG
- Debug = (char) NULL;
- #endif
- Filename = (char *) NULL;
- Linecount = 0;
- Saw_break = (char) NULL;
- Stackptr = Stackbtm - 1;
- Stacktop = Stackbtm + MAXSTACKSZ;
- Nextvar = Vartab;
-
- strcpy( Fieldsep, " \t" );
- strcpy( Recordsep, "\n" );
-
- /*
- * Parse command line
- */
- while ( --argc ) {
- if ( **(++argv) == '-' ) {
- /*
- * Process dash options.
- */
- switch ( tolower( *(++(*argv)) ) ) {
- #ifdef DEBUG
- case 'd':
- ++Debug;
- break;
- #endif
- case 0:
- ++getstdin;
- --argv;
- goto dosomething;
- break;
- default:
- usage();
- }
- }
- else {
- dosomething:
- if ( gotrules ) {
- /*
- * Already read rules file - assume this is
- * is a text file for processing.
- */
- if ( ++didfile == 1 && Beginact ) doaction( Beginact );
- if ( getstdin ) {
- --getstdin;
- newfile( 0 );
- }
- else newfile( *argv );
- process();
- }
- else {
- /*
- * First file name argument on command line
- * is assumed to be a rules file - attempt to
- * compile it.
- */
- if ( getstdin ) {
- --getstdin;
- newfile( 0 );
- }
- else newfile( *argv );
- compile();
- gotrules = 1;
- }
- }
- }
- if ( !gotrules ) usage();
- if ( ! didfile ) {
- /*
- * Didn't process any files yet - process stdin.
- */
- newfile( 0 );
- if ( Beginact ) doaction( Beginact );
- process();
- }
- if ( Endact ) doaction( Endact );
- }
-
- /*
- * Regular expression/action file compilation routines.
- */
- compile() {
- /*
- * Compile regular expressions and C actions into Rules struct,
- * reading from current input file "Fileptr".
- */
- int c, len;
-
- #ifdef DEBUG
- if ( Debug ) error( "compiling...", 0 );
- #endif
- while ( (c = getcharacter()) != -1 ) {
- if ( c==' ' || c=='\t' || c=='\n' ) /* swallow whitespace */
- ;
- else if ( c=='#' ) {
- /*
- * Swallow comments
- */
- while ( (c=getcharacter()) != -1 && c!='\n' )
- ;
- }
- else if ( c=='{' ) {
- #ifdef DEBUG
- if ( Debug ) error( "action", 0 );
- #endif
- /*
- * Compile (tokenize) the action string into our
- * global work buffer, then allocate some memory
- * for it and copy it over.
- */
- ungetcharacter( '{' );
- len = act_compile( Workbuf );
-
- if ( Rulep && Rulep->action ) {
- Rulep->nextrule = (RULE *) getmem( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( !Rulep ) {
- /*
- * This is the first action encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = (RULE *) getmem( sizeof( *Rulep ) );
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- Rulep->action = getmem( len );
- movemem( Workbuf, Rulep->action, len );
- }
- else if ( c==',' ) {
- #ifdef DEBUG
- if ( Debug ) error( "stop pattern", 0 );
- #endif
- /*
- * It's (hopefully) the second part of a two-part
- * pattern string. Swallow the comma and start
- * compiling an action string.
- */
- if ( !Rulep || !Rulep->pattern.start )
- error( "stop pattern without a start", RE_ERROR );
- if ( Rulep->pattern.stop )
- error( "already have a stop pattern", RE_ERROR );
- len = pat_compile( Workbuf );
- Rulep->pattern.stop = getmem( len );
- movemem( Workbuf, Rulep->pattern.stop, len );
- }
- else {
- /*
- * Assume it's a regular expression pattern
- */
- #ifdef DEBUG
- if ( Debug ) error( "start pattern", 0 );
- #endif
- ungetcharacter( c );
- len = pat_compile( Workbuf );
-
- if ( *Workbuf == T_BEGIN ) {
- /*
- * Saw a "BEGIN" keyword - compile following
- * action into special "Beginact" buffer.
- */
- len = act_compile( Workbuf );
- Beginact = getmem( len );
- movemem( Workbuf, Beginact, len );
- continue;
- }
- if ( *Workbuf == T_END ) {
- /*
- * Saw an "END" keyword - compile following
- * action into special "Endact" buffer.
- */
- len = act_compile( Workbuf );
- Endact = getmem( len );
- movemem( Workbuf, Endact, len );
- continue;
- }
- if ( Rulep ) {
- /*
- * Already saw a pattern/action - link in
- * another Rules structure.
- */
- Rulep->nextrule = (RULE *) getmem( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( !Rulep ) {
- /*
- * This is the first pattern encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = (RULE *) getmem( sizeof( *Rulep ) );
- fillmem( Rulep, sizeof( *Rulep ), 0 );
- }
- if ( Rulep->pattern.start )
- error( "already have a start pattern", RE_ERROR );
- Rulep->pattern.start = getmem( len );
- movemem( Workbuf, Rulep->pattern.start, len );
- }
- }
- endfile();
- }
-
- /*
- * Text file main processing loop.
- */
- process() {
- /*
- * Read a line at a time from current input file at "Fileptr",
- * then apply each rule in the Rules chain to the input line.
- */
- int i;
-
- #ifdef DEBUG
- if ( Debug ) error( "processing...", 0 );
- #endif
- Recordcount = 0;
- while ( getline() ) {
- /*
- * Parse the input line.
- */
- Fieldcount = parse( Linebuf, Fields, Fieldsep );
- #ifdef DEBUG
- if ( Debug>1 ) {
- printf( "parsed %d words:\n", Fieldcount );
- for(i=0; i<Fieldcount; ++i ) printf( "<%s>\n", Fields[i] );
- }
- #endif
- Rulep = Rules;
- do {
- if ( ! Rulep->pattern.start ) {
- /*
- * No pattern given - perform action on
- * every input line.
- */
- doaction( Rulep->action );
- }
- else if ( Rulep->pattern.startseen ) {
- /*
- * Start pattern already found - perform
- * action then check if line matches
- * stop pattern.
- */
- doaction( Rulep->action );
- if ( dopattern( Rulep->pattern.stop ) )
- Rulep->pattern.startseen = 0;
- }
- else if ( dopattern( Rulep->pattern.start ) ) {
- /*
- * Matched start pattern - perform action.
- * If a stop pattern was given, set "start
- * pattern seen" flag and process every input
- * line until stop pattern found.
- */
- doaction( Rulep->action );
- if ( Rulep->pattern.stop )
- Rulep->pattern.startseen = 1;
- }
- }
- while ( (Rulep = Rulep->nextrule) != 0 );
- /*
- * Release memory allocated by parse().
- */
- while ( Fieldcount ) free( Fields[ --Fieldcount ] );
- }
- }
-
- /*
- * Miscellaneous functions
- */
- parse( str, wrdlst, delim )
- char *str;
- char *wrdlst[];
- char *delim;
- {
- /*
- * Parse the string of words in "str" into the word list at "wrdlst".
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- * Returns the number of words parsed.
- * CAUTION: the memory for the words in "wrdlst" is allocated
- * by malloc() and should eventually be returned by free()...
- */
- int wrdcnt, wrdlen;
- char wrdbuf[ MAXLINELEN ], c;
-
- wrdcnt = 0;
- while ( *str ) {
- while ( instr( *str, delim ) ) ++str;
- if ( !*str ) break;
- wrdlen = 0;
- while ( ((c = *str) != 0) && !instr( c, delim ) ) {
- wrdbuf[ wrdlen++ ] = c;
- ++str;
- }
- wrdbuf[ wrdlen++ ] = 0;
- /*
- * NOTE: allocate a MAXLINELEN sized buffer for every
- * word, just in case user wants to copy a larger string
- * into a field.
- */
- wrdlst[ wrdcnt ] = getmem( MAXLINELEN );
- strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
- }
- return wrdcnt;
- }
-
- unparse( wrdlst, wrdcnt, str, delim )
- char *wrdlst[];
- int wrdcnt;
- char *str;
- char *delim;
- {
- /*
- * Replace all the words in "str" with the words in "wrdlst",
- * maintaining the same word seperation distance as found in
- * the string.
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- */
- int wc;
- char strbuf[ MAXLINELEN ], *sp, *wp, *start;
-
- wc = 0; /* next word in "wrdlst" */
- sp = strbuf; /* points to our local string */
- start = str; /* save start address of "str" for later... */
- while ( *str ) {
- /*
- * Copy the field delimiters from the original string to
- * our local version.
- */
- while ( instr( *str, delim ) ) *sp++ = *str++;
- if ( !*str ) break;
- /*
- * Skip over the field in the original string and...
- */
- while ( *str && !instr( *str, delim ) ) ++str;
- if ( wc < wrdcnt ) {
- /*
- * ...copy in the field in the wordlist instead.
- */
- wp = wrdlst[ wc++ ];
- while ( *wp )
- *sp++ = *wp++;
- }
- }
- /*
- * Tie off the local string, then copy it back to caller's string.
- */
- *sp = 0;
- strcpy( start, strbuf );
- }
-
- instr( c, s )
- char c, *s;
- {
- while ( *s ) if ( c==*s++ ) return 1;
- return 0;
- }
-
- char *
- getmem( len )
- unsigned len;
- {
- char *cp;
-
- if ( (cp = (char *) malloc( len )) != 0 ) return cp;
- error( "out of memory", MEM_ERROR );
- }
-
- void
- newfile( s )
- char *s;
- {
- Linecount = 0;
- if ( (Filename = s) != 0 ) {
- #ifdef BDS_C
- if ( fopen( s, Fileptr = Curfbuf ) == -1 )
- #else
- if ( (Fileptr = fopen( s, "r" )) == 0)
- #endif
- error( "file not found", FILE_ERROR );
- }
- else {
- /*
- * No file name given - process standard input.
- */
- Fileptr = stdin;
- Filename = "standard input";
- }
- }
-
- getline() {
- /*
- * Read a line of text from current input file. Strip off
- * trailing record seperator (newline).
- */
- int rtn, len;
-
- for ( len=0; len<MAXLINELEN; ++len ) {
- if ( (rtn = getcharacter()) == *Recordsep || rtn == -1 ) break;
- Linebuf[ len ] = rtn;
- }
- Linebuf[ len ] = 0;
- if ( rtn == -1 ) {
- endfile();
- return 0;
- }
- return 1;
- }
-
- getcharacter() {
- /*
- * Read a character from curren input file.
- * WARNING: your getc() must convert lines that end with CR+LF
- * to LF and CP/M's EOF character (^Z) to a -1.
- * Also, getc() must return a -1 when attempting to read from
- * an unopened file.
- */
- int c;
-
- #ifdef BDS_C
- /*
- * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions <gag>
- */
- if ( (c = getc( Fileptr )) == '\r' ) {
- if ( (c = getc( Fileptr )) != '\n' ) {
- ungetc( c );
- c = '\r';
- }
- }
- else if ( c == 26 ) c = -1; /* ^Z */
- #else
- c = getc( Fileptr );
- #endif
- if ( c == *Recordsep ) ++Recordcount;
- if ( c=='\n' ) ++Linecount;
- return c;
- }
-
- ungetcharacter( c ) {
- /*
- * Push a character back into the input stream.
- * If the character is a record seperator, or a newline character,
- * the record and line counters are adjusted appropriately.
- */
- if ( c == *Recordsep ) --Recordcount;
- if ( c=='\n' ) --Linecount;
- return ungetc( c, Fileptr );
- }
-
- endfile() {
- fclose( Fileptr );
- Filename = (char *) Linecount = 0;
- }
-
- error( s, severe )
- char *s;
- int severe;
- {
- char *cp, *errat;
-
- if ( Filename ) fprintf( stderr, "%s:", Filename );
- if ( Linecount ) fprintf( stderr, " line %d:", Linecount );
- fprintf( stderr, " %s\n", s );
- if ( severe ) exit( severe );
- }
-
- usage() {
- error( "Usage: bawk <actfile> [<file> ...]\n", USAGE_ERROR );
- }
-
- movemem( from, to, count )
- char *from, *to;
- int count;
- {
- while ( count-- > 0 ) *to++ = *from++;
- }
-
- fillmem( array, count, value )
- char *array, value;
- int count;
- {
- while ( count-- > 0 ) *array++ = value;
- }
-
- strncmp( s, t, n )
- char *s, *t;
- int n;
- {
- while ( --n>0 && *s && *t && *s==*t ) {
- ++s;
- ++t;
- }
- if ( *s || *t ) return *s - *t;
- return 0;
- }
-
- num( c )
- char c;
- {
- return '0'<=c && c<='9';
- }
-
- alpha( c )
- char c;
- {
- return ('a'<=c && c<='z') || ('A'<=c && c<='Z') || c=='_';
- }
-
- alphanum( c )
- char c;
- {
- return alpha( c ) || num( c );
- }